from dataidea.packages import plt, sns, np, pd
Seaborn Part 2
The Python visualization library Seaborn is based on matplotlib and provides a high-level interface for drawing attractive statistical graphics.
Make use of the following aliases to import the libraries:
The basic steps to creating plots with Seaborn are
- Prepare some data
- Control figure aesthetics
- Plot with Seaborn
- Further customize your plot
= sns.load_dataset('tips') tips
tips.head()
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
"whitegrid") sns.set_style(
= sns.lmplot(x="tip", y="total_bill", data=tips, aspect=2)
g "Tip","Total bill(USD)")
g.set_axis_labels( plt.show()
Data
Seaborn also offers built-in data sets:
= np.random.rand(10, 12) uniform_data
= pd.DataFrame({'x':np.arange(1,101),
data 'y':np.random.normal(0,4,100)})
= sns.load_dataset("titanic")
titanic = sns.load_dataset("iris") iris
= plt.subplots() fig, ax
Plotting with Seaborn
Axis Grids
# Subplot grid for plotting conditional relationships
= sns.FacetGrid(titanic, col="survived", row="sex")
g = g.map(plt.hist,"age")
g #Draw a categorical plot onto a Facetgrid
Subplot grid for plotting pairwise relationships
= sns.PairGrid(iris)
h = h.map(plt.scatter) h
sns.pairplot(iris) plt.show()
Grid for bivariate plot with marginal univariate plots
= sns.JointGrid(x="x",
i ="y",
y=data)
data= i.plot(sns.regplot,
i sns.distplot)
/home/jumashafara/venvs/programming_for_data_science/lib/python3.10/site-packages/seaborn/axisgrid.py:1886: UserWarning:
`distplot` is a deprecated function and will be removed in seaborn v0.14.0.
Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).
For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
func(self.x, **orient_kw_x, **kwargs)
/home/jumashafara/venvs/programming_for_data_science/lib/python3.10/site-packages/seaborn/axisgrid.py:1892: UserWarning:
`distplot` is a deprecated function and will be removed in seaborn v0.14.0.
Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).
For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
func(self.y, **orient_kw_y, **kwargs)
# Plot data and regression model fitsacross a FacetGrid
="sepal_width",
sns.lmplot(x="sepal_length",
y="species",
hue= 'sd',
x_ci =iris)
data plt.show()
Categorical Plots
Scatterplot
Scatterplot with one categorical variable
="species",
sns.stripplot(x="petal_length",
y=iris, hue='species')
data plt.show()
Categorical scatterplot with non-overlapping points
="species",
sns.swarmplot(x="petal_length",
y=iris, hue='species')
data plt.show()
/home/jumashafara/venvs/programming_for_data_science/lib/python3.10/site-packages/seaborn/categorical.py:3399: UserWarning: 12.0% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
warnings.warn(msg, UserWarning)
Bar Chart
Show point estimates and confidence intervals with scatterplot glyphs
="sex",
sns.barplot(x="survived",
y="class",
hue=titanic)
data plt.show()
Count Plot
Show count of observations
sns.countplot(="deck",
x=titanic,
data="Greens_d",
palette='survived')
hue plt.show()
Point Plot
Show point estimates and confidence intervals as rectangular bars
="class", y="survived", hue="sex", data=titanic,
sns.pointplot(x={"male":"g","female":"m"}, markers=["^","o"], linestyles=["-","--"])
palette plt.show()
Boxplot
="alive", y="age", hue="adult_male", data=titanic)
sns.boxplot(x plt.show()
=iris,orient="h")
sns.boxplot(data plt.show()
Violin Plot
="age",
sns.violinplot(x="sex",
y="survived",
hue=titanic) data
Distribution Plots
Plot univariate distribution
= sns.displot(data.y, kde=False, color="b") plot
=True, color="b") sns.displot(data.y, kde
Matrix Plots
Heatmap
# Exclude non-numeric columns from correlation calculation
= tips.select_dtypes(include=['float64', 'int64'])
numeric_cols = numeric_cols.corr()
correlation_matrix
# Plotting the heatmap
=True)
sns.heatmap(correlation_matrix, annot'Correlation Heatmap')
plt.title( plt.show()
plt.show()"foo.png")
plt.savefig(
# You can save a transparent figure
"foo.png", transparent=True) plt.savefig(
<Figure size 640x480 with 0 Axes>
= plt.subplots()
fig3, ax # Create the regression plot
="petal_width", y="petal_length", data=iris, ax=ax)
sns.regplot(x
# Display the plot
plt.show()